--- title: Title keywords: fastai sidebar: home_sidebar nb_path: "EDA.ipynb" ---
from glob import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow_io as tfio
import tensorflow as tf
from IPython import display
import random
random.seed(42)
ebirds = ['norcar', 'blujay', 'bkcchi']
files = glob('dataset/'+ebirds[0]+'/*')
file_shape, decoded_audio = [], []
for file in files:
audio = tf.io.read_file(file)
decoded_audio.append(tfio.audio.decode_mp3(audio))
file_shape.append(decoded_audio[-1].shape)
num_channels = np.array([file_shape[i][1] for i in range(len(file_shape))])
np.unique(num_channels, return_counts=True)
file_lengths = np.array([file_shape[i][0] for i in range(len(file_shape))])
file_lengths.max() / file_lengths.min()
file_lengths.argmax()
plt.hist(np.array(file_lengths))
shortest_file_idx = file_lengths.argmin()
longest_file_idx = file_lengths.argmax()
display.Audio(files[longest_file_idx])